Affine
将来自输入数据的若干行按照一个“context”(偏移集合)拼接/裁切成一个中间矩阵,再对该中间矩阵与权重做一次矩阵乘和偏置加法操作,最后应用激活函数(如果指定),得到输出结果。
该算子支持全量运行和增量运行两种模式,并维护了上一次全窗口的输出(previous_output)以支持增量更新。
- 输入:
input0 - 输入数据张量地址。
input1 - 权重矩阵地址。
input2 - 偏置向量地址。
input0_shape - 输入数据形状数组,长度为3,第一维值为1。
input1_shape - 权重矩阵形状数组,长度为3,第一维值为1。
input2_shape - 偏置向量形状数组,长度为3,第一维值为1。
output_shape - 输出张量形状数组,长度为3,第一维值为1。
context - 上下文索引数组,值递增。
context_size - 上下文大小。
output_dim - 输出维度,即输入张量最后一维大小乘以上下文大小。
activation_type - 激活函数类型,0-8。
is_full_run - 全量运行标志指针,所指值为1时为全量更新(运行后修改为0),为0时为增量更新。
full_input - 全量输入缓冲区地址。
full_input_shape - 全量输入形状数组。
increment_input - 增量输入缓冲区地址。
increment_input_shape - 增量输入形状数组。
increment_output - 增量输出缓冲区地址。
increment_output_shape - 增量输出形状数组。
previous_output - 先前输出缓冲区地址。
previous_output_shape - 先前输出形状数组。
core_mask(int, 可选) - 核掩码(仅适用于共享存储版本)。
- 输出:
output - 仿射变换结果张量地址。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持的数据类型:int8, fp32
MT7004 支持的数据类型:fp16, fp32
激活函数类型定义:
1#define ActivationType_NO_ACTIVATION 0 // 无激活函数
2#define ActivationType_RELU 1 // ReLU激活函数
3#define ActivationType_RELU6 2 // ReLU6激活函数
4#define ActivationType_SIGMOID 3 // Sigmoid激活函数
5#define ActivationType_TANH 4 // Tanh激活函数
6#define ActivationType_SWISH 5 // Swish激活函数
7#define ActivationType_HSWISH 6 // Hard Swish激活函数
8#define ActivationType_HSIGMOID 7 // Hard Sigmoid激活函数
9#define ActivationType_SOFTPLUS 8 // Softplus激活函数
激活函数数学公式:
ReLU: \(f(x) = \max(0, x)\)
ReLU6: \(f(x) = \min(\max(0, x), 6)\)
Sigmoid: \(f(x) = \frac{1}{1 + e^{-x}}\)
Tanh: \(f(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}\)
Swish: \(f(x) = x \cdot \sigma(x) = \frac{x}{1 + e^{-x}}\)
Hard Swish: \(f(x) = x \cdot \frac{\min(\max(x + 3, 0), 6)}{6}\)
Hard Sigmoid: \(f(x) = \frac{\min(\max(x + 3, 0), 6)}{6}\)
Softplus: \(f(x) = ln(1 + e^x)\)
参数数组结构:
1long long params[21];
2params[0] = (long long)input0; // 输入数据张量地址
3params[1] = (long long)input1; // 权重矩阵地址
4params[2] = (long long)input2; // 偏置向量地址
5params[3] = (long long)output; // 输出张量地址
6params[4] = (long long)input0_shape; // 输入数据形状数组
7params[5] = (long long)input1_shape; // 权重矩阵形状数组
8params[6] = (long long)input2_shape; // 偏置向量形状数组
9params[7] = (long long)output_shape; // 输出张量形状数组
10params[8] = (long long)context; // 上下文索引数组
11params[9] = (long long)context_size; // 上下文大小
12params[10] = (long long)output_dim; // 输出维度
13params[11] = (long long)activation_type; // 激活函数类型
14params[12] = (long long)&is_full_run; // 全量运行标志指针
15params[13] = (long long)full_input; // 全量输入缓冲区地址
16params[14] = (long long)full_input_shape; // 全量输入形状数组
17params[15] = (long long)increment_input; // 增量输入缓冲区地址
18params[16] = (long long)increment_input_shape; // 增量输入形状数组
19params[17] = (long long)increment_output; // 增量输出缓冲区地址
20params[18] = (long long)increment_output_shape; // 增量输出形状数组
21params[19] = (long long)previous_output; // 先前输出缓冲区地址
22params[20] = (long long)previous_output_shape; // 先前输出形状数组
共享存储版本:
-
void i8_affine_s(long long *params, int core_mask)
-
void fp_affine_s(long long *params, int core_mask)
-
void hp_affine_s(long long *params, int core_mask)
C调用示例:
1// FT78NE 多核示例 2#include <stdio.h> 3#include <stdlib.h> 4#include <time.h> 5#include <affine.h> 6 7void test_fp_affine_s(int a, int b, int c, int o, int activation_type, int full_run, int core_mask) { 8 int i = 0, j = 0; 9 srand(time(0)); 10 11 int core_id = DNUM; 12 int logic_core_id = GetLogicCoreId(core_mask, core_id); 13 int num = GetCoreNum(core_mask); 14 15 int is_full_run = full_run; 16 int context[] = {-1, 0, 1, 2}; 17 int context_size = c; 18 int output_dim = b * c; 19 20 // 形状定义 21 int input0_shape[3] = {1, a, b}; 22 int input1_shape[3] = {1, b * c, o}; 23 int input2_shape[3] = {1, a - c + 1, o}; 24 int output_shape[3] = {1, a - c + 1, o}; 25 26 // 中间缓冲区形状 27 int full_input_shape[3] = {1, input0_shape[1] - (context[context_size - 1] - context[0]), output_dim}; 28 int increment_input_shape[3] = {1, 1, output_dim}; 29 int increment_output_shape[3] = {1, 1, output_shape[2]}; 30 int previous_output_shape[3] = {1, output_shape[1], output_shape[2]}; 31 32 // 内存分配 33 float* input0 = (float*)(0xA0400000); 34 float* input1 = (float*)(0xA0400000 + 0x100000); 35 float* input2 = (float*)(0xA0400000 + 0x200000); 36 float* output = (float*)(0xA0400000 + 0x300000); 37 float* full_input = (float*)(0xA0400000 + 0x400000); 38 float* increment_input = (float*)(0xA0400000 + 0x500000); 39 float* increment_output = (float*)(0xA0400000 + 0x600000); 40 float* previous_output = (float*)(0xA0400000 + 0x700000); 41 42 // 初始化数据 43 if (logic_core_id == 0) { 44 int input0_len = input0_shape[0] * input0_shape[1] * input0_shape[2]; 45 int input1_len = input1_shape[0] * input1_shape[1] * input1_shape[2]; 46 int input2_len = input2_shape[0] * input2_shape[1] * input2_shape[2]; 47 48 for (i = 0; i < input0_len; i++) { 49 input0[i] = ((float)rand() / RAND_MAX) * 2 - 1; 50 } 51 for (i = 0; i < input1_len; i++) { 52 input1[i] = ((float)rand() / RAND_MAX) * 2 - 1; 53 } 54 for (i = 0; i < input2_shape[2]; i++) { 55 input2[i] = ((float)rand() / RAND_MAX) * 2 - 1; 56 for (j = 1; j < input2_shape[1]; j++) { 57 input2[i + j * input2_shape[2]] = input2[i]; 58 } 59 } 60 } 61 62 // 准备参数数组 63 long long params[21]; 64 params[0] = (long long)input0; 65 params[1] = (long long)input1; 66 params[2] = (long long)input2; 67 params[3] = (long long)output; 68 params[4] = (long long)input0_shape; 69 params[5] = (long long)input1_shape; 70 params[6] = (long long)input2_shape; 71 params[7] = (long long)output_shape; 72 params[8] = (long long)context; 73 params[9] = (long long)context_size; 74 params[10] = (long long)output_dim; 75 params[11] = (long long)activation_type; 76 params[12] = (long long)&is_full_run; 77 params[13] = (long long)full_input; 78 params[14] = (long long)full_input_shape; 79 params[15] = (long long)increment_input; 80 params[16] = (long long)increment_input_shape; 81 params[17] = (long long)increment_output; 82 params[18] = (long long)increment_output_shape; 83 params[19] = (long long)previous_output; 84 params[20] = (long long)previous_output_shape; 85 86 // 执行 Affine 操作 87 fp_affine_s(params, core_mask); 88} 89 90int main(void) { 91 int a = 23, b = 31, c = 4, o = 29; 92 int activation_type = 0; // 激活函数类型 93 int full_run = 1; // 全量运行标志 94 int core_mask = 0xff; // 核掩码 95 96 test_fp_affine_s(a, b, c, o, activation_type, full_run, core_mask); 97 return 0; 98}
私有存储版本:
-
void i8_affine_p(long long *params)
-
void fp_affine_p(long long *params)
-
void hp_affine_p(long long *params)
C调用示例:
1// FT78NE 单核示例 2#include <stdio.h> 3#include <affine.h> 4 5int main(void) { 6 // 参数设置(与共享版本类似) 7 int a = 32, b = 16, c = 4, o = 16; 8 int is_full_run = full_run; 9 int context[] = {-1, 0, 1, 2}; 10 int context_size = c; 11 int output_dim = b * c; 12 13 int input0_shape[3] = {1, a, b}; 14 int input1_shape[3] = {1, b * c, o}; 15 int input2_shape[3] = {1, a - c + 1, o}; 16 int output_shape[3] = {1, a - c + 1, o}; 17 18 int full_input_shape[3] = {1, input0_shape[1] - (context[context_size - 1] - context[0]), output_dim}; 19 int increment_input_shape[3] = {1, 1, output_dim}; 20 int increment_output_shape[3] = {1, 1, output_shape[2]}; 21 int previous_output_shape[3] = {1, output_shape[1], output_shape[2]}; 22 23 float* input0 = (float*)(0x10810000); 24 float* input1 = (float*)(0x10810000 + 0x100000); 25 float* input2 = (float*)(0x10810000 + 0x200000); 26 float* output = (float*)(0x10810000 + 0x300000); 27 float* full_input = (float*)(0x10810000 + 0x400000); 28 float* increment_input = (float*)(0x10810000 + 0x500000); 29 float* increment_output = (float*)(0x10810000 + 0x600000); 30 float* previous_output = (float*)(0x10810000 + 0x700000); 31 32 // 准备参数数组(与共享版本相同) 33 long long params[21]; 34 params[0] = (long long)input0; 35 params[1] = (long long)input1; 36 params[2] = (long long)input2; 37 params[3] = (long long)output; 38 params[4] = (long long)input0_shape; 39 params[5] = (long long)input1_shape; 40 params[6] = (long long)input2_shape; 41 params[7] = (long long)output_shape; 42 params[8] = (long long)context; 43 params[9] = (long long)context_size; 44 params[10] = (long long)output_dim; 45 params[11] = (long long)activation_type; 46 params[12] = (long long)&is_full_run; 47 params[13] = (long long)full_input; 48 params[14] = (long long)full_input_shape; 49 params[15] = (long long)increment_input; 50 params[16] = (long long)increment_input_shape; 51 params[17] = (long long)increment_output; 52 params[18] = (long long)increment_output_shape; 53 params[19] = (long long)previous_output; 54 params[20] = (long long)previous_output_shape; 55 56 // 调用 Affine 57 fp_affine_p(params); 58 return 0; 59}